In [4]:
import os, types
import pandas as pd
from botocore.client import Config
import ibm_boto3

def __iter__(self): return 0

# @hidden_cell
# The following code accesses a file in your IBM Cloud Object Storage. It includes your credentials.
# You might want to remove those credentials before you share the notebook.

cos_client = ibm_boto3.client(service_name='s3',
    ibm_api_key_id='696W9JyPmRlNKHtY3s-8sBBPSmFLennhf_3obuhn63C0',
    ibm_auth_endpoint="https://iam.cloud.ibm.com/oidc/token",
    config=Config(signature_version='oauth'),
    endpoint_url='https://s3.private.eu-de.cloud-object-storage.appdomain.cloud')

bucket = 'ctr-donotdelete-pr-kp5ryzb5c6gmwg'
object_key = 'Click.csv'

body = cos_client.get_object(Bucket=bucket,Key=object_key)['Body']
# add missing __iter__ method, so pandas accepts body as file-like object
if not hasattr(body, "__iter__"): body.__iter__ = types.MethodType( __iter__, body )

df_1 = pd.read_csv(body)
df_1.head(10)
Out[4]:
Daily Time Spent on Site Age Area Income Daily Internet Usage Ad Topic Line City Gender Country Timestamp Clicked on Ad
0 62.26 32.0 69481.85 172.83 Decentralized real-time circuit Lisafort Male Svalbard & Jan Mayen Islands 2016-06-09 21:43:05 0
1 41.73 31.0 61840.26 207.17 Optional full-range projection West Angelabury Male Singapore 2016-01-16 17:56:05 0
2 44.40 30.0 57877.15 172.83 Total 5thgeneration standardization Reyesfurt Female Guadeloupe 2016-06-29 10:50:45 0
3 59.88 28.0 56180.93 207.17 Balanced empowering success New Michael Female Zambia 2016-06-21 14:32:32 0
4 49.21 30.0 54324.73 201.58 Total 5thgeneration standardization West Richard Female Qatar 2016-07-21 10:54:35 1
5 51.30 26.0 51463.17 131.68 Focused multi-state workforce Port Maria Female Cameroon 2016-05-15 13:18:34 0
6 66.08 43.0 73538.09 136.40 Optimized upward-trending productivity Port Jeffrey Male Turkey 2016-04-03 21:13:46 1
7 36.08 26.0 74903.41 228.78 Programmable uniform website East Kevinbury Male French Guiana 2016-03-11 06:49:10 0
8 46.14 33.0 43974.49 196.77 Customizable tangible hierarchy Lake Annashire Male Vanuatu 2016-03-24 06:36:52 0
9 51.65 51.0 74535.94 188.56 Function-based incremental standardization Edwardmouth Female Cameroon 2016-01-31 05:12:44 0
In [2]:
df_1.tail()
Out[2]:
Daily Time Spent on Site Age Area Income Daily Internet Usage Ad Topic Line City Gender Country Timestamp Clicked on Ad
9995 41.73 31.0 61840.26 207.17 Profound executive flexibility West Angelabury Male Singapore 2016-01-03 03:22:15 1
9996 41.73 28.0 51501.38 120.49 Managed zero tolerance concept Kennedyfurt Male Luxembourg 2016-05-28 12:20:15 0
9997 55.60 39.0 38067.08 124.44 Intuitive exuding service-desk North Randy Female Egypt 2016-01-05 11:53:17 0
9998 46.61 50.0 43974.49 123.13 Realigned content-based leverage North Samantha Female Malawi 2016-04-04 07:07:46 1
9999 46.61 43.0 60575.99 198.45 Optimized upward-trending productivity Port Jeffrey Male Northern Mariana Islands 2016-04-03 21:13:46 1
In [5]:
df_1.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Data columns (total 10 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   Daily Time Spent on Site  10000 non-null  float64
 1   Age                       10000 non-null  float64
 2   Area Income               10000 non-null  float64
 3   Daily Internet Usage      10000 non-null  float64
 4   Ad Topic Line             10000 non-null  object 
 5   City                      10000 non-null  object 
 6   Gender                    10000 non-null  object 
 7   Country                   10000 non-null  object 
 8   Timestamp                 10000 non-null  object 
 9   Clicked on Ad             10000 non-null  int64  
dtypes: float64(4), int64(1), object(5)
memory usage: 781.4+ KB
In [6]:
df_1.describe()
Out[6]:
Daily Time Spent on Site Age Area Income Daily Internet Usage Clicked on Ad
count 10000.000000 10000.000000 10000.000000 10000.000000 10000.000000
mean 61.660757 35.940100 53840.047721 177.759831 0.491700
std 15.704142 8.572973 13343.708718 40.820951 0.499956
min 32.600000 19.000000 13996.500000 105.220000 0.000000
25% 48.860000 29.000000 44052.302500 140.150000 0.000000
50% 59.590000 35.000000 56180.930000 178.920000 0.000000
75% 76.580000 42.000000 61840.260000 212.670000 1.000000
max 90.970000 60.000000 79332.330000 269.960000 1.000000
In [2]:
import plotly.graph_objects as go
import plotly.express as px
import plotly.io as pio
pio.templates.default
Out[2]:
'plotly'
In [7]:
df_1['Clicked on Ad']=df_1["Clicked on Ad"].map({0:"No",1:"Yes"})
In [8]:
fig=px.box(df_1,
          x="Daily Time Spent on Site",
          color="Clicked on Ad",
          title="Click through rate based time spent on site",
          color_discrete_map={'Yes':'Pink',
                             'No':'Blue'})
fig.update_traces(quartilemethod="exclusive")
fig.show()
In [13]:
fig=px.box(df_1,
          x="Daily Internet Usage",
          color="Clicked on Ad",
          title="Click through rate based on Daily Internet Usage",
          color_discrete_map={'Yes':'Pink',
                             'No':'Blue'})
fig.update_traces(quartilemethod="exclusive")
fig.show()
In [9]:
fig=px.box(df_1,
          x="Age",
          color="Clicked on Ad",
          title="Click through rate based on Age",
          color_discrete_map={'Yes':'Pink',
                             'No':'Blue'})
fig.update_traces(quartilemethod="exclusive")
fig.show()
In [12]:
fig=px.box(df_1,
          x="Area Income",
          color="Clicked on Ad",
          title="Click Through Rate based on Income",
          color_discrete_map={'Yes':'Pink',
                             'No':'Blue'})
fig.update_traces(quartilemethod="exclusive")
fig.show()
In [10]:
df_1['Clicked on Ad'].value_counts()
Out[10]:
No     5083
Yes    4917
Name: Clicked on Ad, dtype: int64
In [11]:
click_through_rate=4917/10000*100
print(click_through_rate)
49.17
In [ ]: